Preparations

Load the necessary libraries

library(mgcv)      #for GAMs
library(gratia)    #for GAM plots
library(emmeans)   #for marginal means etc
library(broom)     #for tidy output
library(MuMIn)     #for model selection and AICc
library(lubridate) #for processing dates
library(mapdata)
library(maps)
library(tidyverse) #for data wrangling
library(DHARMa)    #for residual diagnostics
library(performance)
library(see)
library(sf)
library(stars)
library(rnaturalearth)
library(rnaturalearthdata)
library(raster)
library(ggspatial)
library(patchwork)

Scenario

Paruelo and Lauenroth (1996) analyzed the geographic distribution and the effects of climate variables on the relative abundance of a number of plant functional types (PFT’s) including shrubs, forbs, succulents (e.g. cacti), C3 grasses and C4 grasses. They used data from 73 sites across temperate central North America (see pareulo.csv) and calculated the relative abundance of C3 grasses at each site as a response variable

grass

Format of paruelo.csv data file

C3 LAT LONG MAP MAT JJAMAP DJFMAP
... ... ... ... ... ... ...
C3 - Relative abundance of C3 grasses at each site - response variable
LAT - Latitudinal coordinate
LONG - Longitudinal coordinate
MAP - Mean annual precipitation
MAT - Mean annual temperature
JJAMAP - Mean annual precipitation in June, July, August
DJFMAP - Mean annual precipitation in December, January, February

Read in the data

paruelo = read_csv('../public/data/paruelo.csv', trim_ws=TRUE)
glimpse(paruelo)
## Rows: 73
## Columns: 7
## $ C3     <dbl> 0.65, 0.65, 0.76, 0.75, 0.33, 0.03, 0.00, 0.02, 0.05, 0.05, 0.…
## $ LAT    <dbl> 46.40, 47.32, 45.78, 43.95, 46.90, 38.87, 32.62, 36.95, 35.30,…
## $ LONG   <dbl> 119.55, 114.27, 110.78, 101.87, 102.82, 99.38, 106.75, 96.55, …
## $ MAP    <dbl> 199, 469, 536, 476, 484, 623, 259, 969, 542, 421, 446, 376, 66…
## $ MAT    <dbl> 12.4, 7.5, 7.2, 8.2, 4.8, 12.0, 14.5, 15.3, 13.9, 8.5, 5.1, 11…
## $ JJAMAP <dbl> 0.12, 0.24, 0.24, 0.35, 0.40, 0.40, 0.47, 0.30, 0.44, 0.31, 0.…
## $ DJFMAP <dbl> 0.45, 0.29, 0.20, 0.15, 0.14, 0.11, 0.17, 0.14, 0.13, 0.14, 0.…

Exploratory data analysis

We will focus on the spatial components.

Model formula: \[ y_i \sim{} \mathcal{N}(\mu_i, \sigma^2)\\ \mu_i =\beta_0 + f(Long_i) + f(Lat_i) + f(Long_i, Lat_i) \]

where \(\beta_0\) is the y-intercept. \(f(Lat)\) and \(f(Long)\) indicate the additive smoothing functions of the spatial predictors.

Fit the model

Partial plots

s(LONG,LAT)

plot

plot(paruelo.gam1,scheme = 2)

vis.gam

vis.gam(paruelo.gam1, theta=30)

draw

draw(paruelo.gam1)

te(LONG,LAT)

plot

plot(paruelo.gam2,scheme = 2)

vis.gam

vis.gam(paruelo.gam2, theta=30)

draw

draw(paruelo.gam2)

ti(LONG,LAT)

plot

plot(paruelo.gam3,scheme = 2)

vis.gam

vis.gam(paruelo.gam3, theta=30)

draw

draw(paruelo.gam3)

Model investigation / hypothesis testing

s(LONG,LAT)

summary(paruelo.gam1)
## 
## Family: Beta regression(4.304) 
## Link function: logit 
## 
## Formula:
## mC3 ~ s(LONG, LAT)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.1021     0.1081  -10.19   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##               edf Ref.df Chi.sq p-value    
## s(LONG,LAT) 4.848  6.815  59.88  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.455   Deviance explained = 56.3%
## -REML = -45.112  Scale est. = 1         n = 73

Conclusions:

  • in the very center of the sampling domain (average longitude and latitude), the expected percentage cover of C3 grasses is -1.1 (link scale). If we back-transform to the response scale, this is 24.93%
  • there is evidence that the abundance of C3 grases varies non-linearly over the spatial extent of the sampling domain.
  • the model explains 56.28% of the total deviance.
tidy(paruelo.gam1)

te(LONG,LAT)

summary(paruelo.gam2)
## 
## Family: Beta regression(4.366) 
## Link function: logit 
## 
## Formula:
## mC3 ~ te(LONG, LAT)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.1120     0.1077  -10.32   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##              edf Ref.df Chi.sq p-value    
## te(LONG,LAT)   3      3  62.04  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.466   Deviance explained = 55.4%
## -REML = -52.794  Scale est. = 1         n = 73

Conclusions:

  • in the very center of the sampling domain (average longitude and latitude), the expected percentage cover of C3 grasses is -1.11 (link scale). If we back-transform to the response scale, this is 24.75%
  • there is evidence that the abundance of C3 grases varies non-linearly over the spatial extent of the sampling domain.
  • the model explains 55.42% of the total deviance.
tidy(paruelo.gam2)

ti(LONG,LAT)

summary(paruelo.gam3)
## 
## Family: Beta regression(5.099) 
## Link function: logit 
## 
## Formula:
## mC3 ~ ti(LONG) + ti(LAT) + ti(LONG, LAT)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.2330     0.1151  -10.71   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                edf Ref.df Chi.sq p-value    
## ti(LONG)     1.000  1.000  0.378 0.53843    
## ti(LAT)      1.000  1.000 73.604 < 2e-16 ***
## ti(LONG,LAT) 3.523  3.875 15.070 0.00298 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.535   Deviance explained = 63.6%
## -REML =  -52.2  Scale est. = 1         n = 73

Conclusions:

  • in the very center of the sampling domain (average longitude and latitude), the expected percentage cover of C3 grasses is -1.23 (link scale). If we back-transform to the response scale, this is 22.57%
  • at the average latitude, there is no evidence of a longitudinal shift in C3 percentage cover.
  • at the average longitude, there is evidence that C3 percentage cover varies non-linearly from north to south.
  • there is evidence that the abundance of C3 grases varies non-linearly over the spatial extent of the sampling domain.
  • the model explains 63.62% of the total deviance.
tidy(paruelo.gam3)

Summary figures

s(LONG,LAT)

paruelo.list = with(paruelo,
                    list(LAT=seq(min(LAT), max(LAT), len=100),
                         LONG=seq(min(LONG), max(LONG), len=100)))
newdata = emmeans(paruelo.gam1, ~LONG+LAT, at=paruelo.list, type='response') %>%
  as.data.frame
newdata %>% head
ggplot(newdata, aes(y=LAT, x=LONG)) +
  geom_tile(aes(fill=response)) +
  geom_contour(aes(z=response)) +
  scale_fill_gradientn(colors=heat.colors(10)) +
  geom_point(data=paruelo,aes(fill=C3), shape=21, size=5) +
  coord_equal()

newdata.sf <- rasterFromXYZ(newdata[, c("LONG", "LAT", "response")]) %>% 
  st_as_stars() %>%
  st_set_crs(st_crs(usa))
## OR
#newdata.sf <- newdata %>% 
#  st_as_sf(coords=c("LONG", "LAT"),  crs=st_crs(usa)) %>%
#  st_rasterize()
ggplot() +
  geom_sf(data=usa) +
  geom_stars(data=newdata.sf)

newdata.sf <- rasterFromXYZ(newdata[, c("LONG", "LAT", "response")]) %>%
  mask(usa) %>%
  st_as_stars() %>% 
  st_set_crs(st_crs(usa))

ggplot() +
  geom_sf(data=usa) +
  geom_stars(data=newdata.sf) +
  scale_fill_gradientn(colours=heat.colors(10), na.value=NA) + 
  geom_sf(data=paruelo.sf, aes(fill=C3), shape=21,  size=4) +
  annotation_scale(location = "bl", width_hint = 0.25) +
  annotation_north_arrow(location = "bl", which_north = "true", 
        pad_x = unit(0.25, "in"), pad_y = unit(0.2, "in"),
        style = north_arrow_fancy_orienteering) +
  coord_sf(expand=FALSE) +
  theme_bw()

te(LONG,LAT)

paruelo.list = with(paruelo,
                    list(LAT=seq(min(LAT), max(LAT), len=100),
                         LONG=seq(min(LONG), max(LONG), len=100)))
newdata = emmeans(paruelo.gam2, ~LONG+LAT, at=paruelo.list, type='response') %>%
  as.data.frame
newdata %>% head
ggplot(newdata, aes(y=LAT, x=LONG)) +
  geom_tile(aes(fill=response)) +
  geom_contour(aes(z=response)) +
  scale_fill_gradientn(colors=heat.colors(10)) +
  geom_point(data=paruelo,aes(fill=C3), shape=21, size=5) +
  coord_equal()

newdata.sf <- rasterFromXYZ(newdata[, c("LONG", "LAT", "response")]) %>% 
  st_as_stars() %>%
  st_set_crs(st_crs(usa))
## OR
#newdata.sf <- newdata %>% 
#  st_as_sf(coords=c("LONG", "LAT"),  crs=st_crs(usa)) %>%
#  st_rasterize()
ggplot() +
  geom_sf(data=usa) +
  geom_stars(data=newdata.sf)

newdata.sf <- rasterFromXYZ(newdata[, c("LONG", "LAT", "response")]) %>%
  mask(usa) %>%
  st_as_stars() %>% 
  st_set_crs(st_crs(usa))

ggplot() +
  geom_sf(data=usa) +
  geom_stars(data=newdata.sf) +
  scale_fill_gradientn(colours=heat.colors(10), na.value=NA) + 
  geom_sf(data=paruelo.sf, aes(fill=C3), shape=21,  size=4) +
  annotation_scale(location = "bl", width_hint = 0.25) +
  annotation_north_arrow(location = "bl", which_north = "true", 
        pad_x = unit(0.25, "in"), pad_y = unit(0.2, "in"),
        style = north_arrow_fancy_orienteering) +
  coord_sf(expand=FALSE) +
  theme_bw()

ti(LONG,LAT)

paruelo.list = with(paruelo,
                    list(LAT=seq(min(LAT), max(LAT), len=100),
                         LONG=seq(min(LONG), max(LONG), len=100)))
newdata = emmeans(paruelo.gam3, ~LONG+LAT, at=paruelo.list, type='response') %>%
  as.data.frame
newdata %>% head
ggplot(newdata, aes(y=LAT, x=LONG)) +
  geom_tile(aes(fill=response)) +
  geom_contour(aes(z=response)) +
  scale_fill_gradientn(colors=heat.colors(10)) +
  geom_point(data=paruelo,aes(fill=C3), shape=21, size=5) +
  coord_equal()

newdata.sf <- rasterFromXYZ(newdata[, c("LONG", "LAT", "response")]) %>% 
  st_as_stars() %>%
  st_set_crs(st_crs(usa))
## OR
#newdata.sf <- newdata %>% 
#  st_as_sf(coords=c("LONG", "LAT"),  crs=st_crs(usa)) %>%
#  st_rasterize()
ggplot() +
  geom_sf(data=usa) +
  geom_stars(data=newdata.sf)

newdata.sf <- rasterFromXYZ(newdata[, c("LONG", "LAT", "response")]) %>%
  mask(usa) %>%
  st_as_stars() %>% 
  st_set_crs(st_crs(usa))

ggplot() +
  geom_sf(data=usa) +
  geom_stars(data=newdata.sf) +
  scale_fill_gradientn(colours=heat.colors(10), na.value=NA) + 
  geom_sf(data=paruelo.sf, aes(fill=C3), shape=21,  size=4) +
  annotation_scale(location = "bl", width_hint = 0.25) +
  annotation_north_arrow(location = "bl", which_north = "true", 
        pad_x = unit(0.25, "in"), pad_y = unit(0.2, "in"),
        style = north_arrow_fancy_orienteering) +
  coord_sf(expand=FALSE) +
  theme_bw()

Paruelo, J. M., and W. K. Lauenroth. 1996. “Relative Abundance of Plant Functional Types in Grasslands and Shrublands of North America.” Ecologicalapplications, no. 6: 1212–24.